clear
set more off
macro drop all
capture log close
adopath ++ /afs/umich.edu/group/m/mmcmps/ado/

/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection
Create a Victim*Year Panel of Maltreatment

Created on: 2/26/19

Last Modified on: 2/20/2024

Description: This do file uses the cleaned DHHS data to create variables related
	     to future maltreatment.
	     
Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

** Setting the Directory
global rawdata 
global cleandata
global tmp

/********************************************************************************/

***********************************
**(1) DATA ON WHETHER CHILDREN ARE SUBJECTS OF FUTURE INV
***********************************

**Start with the analysis sample and bring in information on future maltreatment
use "${cleandata}analysis_sample.dta", clear
keep vicid inv_caseid complaint_date removal_date fc fc_enddt
foreach x in inv_caseid complaint_date removal_date fc fc_enddt {
	rename `x' `x'1
}
tempfile s
save `s'

**Create file of all other maltreatment, including those in the analaysis file
use "${rawdata}allegations.dta", clear
rename intakechild vicid
rename investigation inv_caseid
rename relationtype rel
rename relationtype2 rel2
keep vicid inv_caseid rel*
replace rel=upper(rel)
replace rel2=upper(rel2)
gen perp_bio_adopt_tmp=0
gen perp_foster_tmp=0
foreach x in "ADOPT" "BIO" "GUARDIAN" "EX" "GRAND" "HALF" "LEGAL" "PUTATIVE" {
	replace perp_bio_adopt_tmp=1 if regexm(rel,"`x'") | regexm(rel2,"`x'")
}
foreach x in "FOSTER" {
	replace perp_foster_tmp=1 if regexm(rel,"`x'") | regexm(rel2,"`x'")
}
foreach x in bio_adopt foster {
	bysort vicid inv_caseid: egen perp_`x'=max(perp_`x'_tmp)
	drop perp_`x'_tmp
}
**if conflicting info, prioritize bio/adoptive relatives
replace perp_foster=0 if perp_bio_adopt==1
gen perp_other=(perp_bio_adopt==0 & perp_foster==0)
la var perp_bio_adopt "Perpetrator was Biological, Adoptive or Legal Child/Grandchild"
la var perp_foster"Perpetrator was in Foster Family"
la var perp_other "Perpetrator was not related, missing or was an aunt/uncle/cousin/not parent figure"
keep vicid inv_caseid perp*
egen tag=tag(vicid inv_caseid)
keep if tag==1
drop tag
tempfile perp
save `perp'

use "${cleandata}master_dhhs.dta", clear
keep vicid inv_caseid complaint_date preponderance
merge 1:1 vicid inv_caseid using `perp'
drop if _merge==2
drop _merge
joinby vicid using `s' 

**Drop all investigations which happened before the one in the analysis sample, or
**between the complaint in the analysis and when a child was removed for foster care.
order vicid inv_caseid1 complaint_date1 removal_date1 fc_enddt1 inv_caseid complaint_date
sort vicid complaint_date1
drop if inv_caseid==inv_caseid1
drop if complaint_date<complaint_date1
drop if complaint_date>complaint_date1 & complaint_date<removal_date1 & fc1==1

**TIMING INDICATORS
gen infc=(fc_enddt1-complaint_date>0 & fc1==1 & fc_enddt!=.)
replace infc=1 if fc1==1 & fc_enddt==.
gen diff=complaint_date-complaint_date1
forv t=1/5 {
	gen post`t'=inrange(diff,365*(`t'-1),365*`t')
	gen infc_post`t'_maltreat_tmp=(infc==1 & post`t'==1)
	bysort vicid inv_caseid: egen infc_post`t'_maltreat=max(infc_post`t'_maltreat_tmp)
	drop infc_post`t'_maltreat_tmp
	la var infc_post`t'_maltreat "In Foster Care `t' Years After Investigation (for maltreatment variables)"
}

**Save 2 intermediate files. One at the child*inv*SY level with binary variables for
**whether the student had an investigation in a given SY. And a second at the child*inv date level.
preserve
keep vicid inv_caseid1 complaint_date preponderance
rename inv_caseid1 inv_caseid
gen year=.
gen complaint_date_year=year(complaint_date)
gen complaint_date_month=month(complaint_date)

local start2008=mdy(9,4,2007)
local start2009=mdy(9,2,2008)
local start2010=mdy(9,8,2009)
local start2011=mdy(9,7,2010)
local start2012=mdy(9,6,2011)
local start2013=mdy(9,4,2012)
local start2014=mdy(9,3,2013)
local start2015=mdy(9,2,2014)
local start2016=mdy(9,8,2015)
local start2017=mdy(9,5,2016)
gen startdate=.
forv y=2008/2017 {
	replace startdate=`start`y'' if complaint_date_year==`y'
}
replace year=complaint_date_year if inrange(complaint_date_month,1,8)
replace year=complaint_date_year if complaint_date_month==9 & day(complaint_date)<day(startdate)
replace year=complaint_date_year+1 if complaint_date_month==9 & day(complaint_date)>=day(startdate)
replace year=complaint_date_year+1 if complaint_date_month>=10
drop if year==2018
gegen inv_sub=max(preponderance), by(vicid year)
keep vicid year inv_caseid inv_sub
gen inv=1
gduplicates drop
save "$cleandata/vic_year_inv.dta", replace
restore

**ANY FUTURE INVESTIGATION
bysort vicid inv_caseid1: gen post_n_inv=_N
gen post_inv=post_n_inv>0
la var post_n_inv "# Investigations After the Current One"
la var post_inv "Had an Investigation After the Current One"

forv t=1/5 {
	local t1=`t'-1
	
	bysort vicid inv_caseid1: egen post`t'_n_inv=sum(post`t')
	la var post`t'_n_inv "# Investigations Between `t1' and `t' Years After Current One"
	
	gen post`t'_inv=post`t'_n_inv>0
	la var post`t'_inv "Had an Investigation Between `t1' and `t' Years After Current One"
	
	gen post`t'_inv_fc_tmp=(post`t'==1 & infc==1)
	bysort vicid inv_caseid1: egen post`t'_inv_fc=max(post`t'_inv_fc_tmp)
	drop post`t'_inv_fc_tmp
	la var post`t'_inv_fc "Had an Investigation while in FC Between `t1' and `t' Years After Current One"
	
	gen post`t'_inv_nofc_tmp=(post`t'==1 & infc==0)
	bysort vicid inv_caseid1: egen post`t'_inv_nofc=max(post`t'_inv_nofc_tmp)
	drop post`t'_inv_nofc_tmp
	la var post`t'_inv_nofc "Had an Investigation not in FC Between `t1' and `t' Years After Current One"
}

**ANY FUTURE SUBSTANTIATED INVESTIGATION
preserve 
keep if preponderance==1
bysort vicid inv_caseid1: gen post_n_sub_inv=_N
gen post_sub_inv=post_n_sub_inv>0
la var post_n_sub_inv "# Sub Investigations After the Current One"
la var post_sub_inv "Had a Sub Investigation After the Current One"
global keepvars post_n_sub_inv post_sub_inv
forv t=1/5 {
	local t1=`t'-1
	
	bysort vicid inv_caseid1: egen post`t'_n_sub_inv=sum(post`t')
	la var post`t'_n_sub_inv "# Sub Investigations Between `t1' and `t' Years After Current One"
	
	gen post`t'_sub_inv=post`t'_n_sub_inv>0
	la var post`t'_sub_inv "Had a Sub Investigation Between `t1' and `t' Years After Current One"
	
	gen post`t'_sub_inv_fc_tmp=(post`t'==1 & infc==1)
	bysort vicid inv_caseid1: egen post`t'_sub_inv_fc=max(post`t'_sub_inv_fc_tmp)
	drop post`t'_sub_inv_fc_tmp
	la var post`t'_sub_inv_fc "Had a Sub Investigation while in FC Between `t1' and `t' Years After Current One"
	
	gen post`t'_sub_inv_nofc_tmp=(post`t'==1 & infc==0)
	bysort vicid inv_caseid1: egen post`t'_sub_inv_nofc=max(post`t'_sub_inv_nofc_tmp)
	drop post`t'_sub_inv_nofc_tmp
	la var post`t'_sub_inv_nofc "Had a Sub Investigation not in FC Between `t1' and `t' Years After Current One"
	
	global keepvars $keepvars post`t'_n_sub_inv post`t'_sub_inv post`t'_sub_inv_fc post`t'_sub_inv_nofc
}
keep vicid inv_caseid1 $keepvars

egen tag=tag(vicid inv_caseid1)
keep if tag==1
drop tag
tempfile sub
save `sub'
restore

merge m:1 vicid inv_caseid1 using `sub'
foreach x in $keepvars {
		replace `x'=0 if _merge==1
}
drop _merge

**Collapse back to child*investigation level, and merge back onto the analysis sample
keep vicid inv_caseid1 post* infc*
drop post1 post2 post3 post4 post5
duplicates drop
rename inv_caseid1 inv_caseid
tempfile post
save `post'

use "${cleandata}analysis_sample.dta", clear
merge 1:1 vicid inv_caseid using `post'
ds post*
foreach v of var `r(varlist)' {
	replace `v'=0 if _merge==1
}
drop _merge
keep vicid inv_caseid post*
sort vicid inv_caseid
compress
save "$cleandata/future_maltreatment.dta", replace





















